*****************************************************************
* 
* Purpose:	
*
*	Construct the tables and figures for MS35905
*	"Grantmaking, Grading on a Curve, and the Paradox of Relative Evaluation in Nonmarkets" 
*   Jérôme Adda and Marco Ottaviani
*   
*   
*
* Input:
*   ERC data:   Success rates .xlsx
*	    	    erc2020_complete.xlsx
*
*	RCN data:   reliability.dta; RCN_Predicted.dta	
*
* Output:
*
*       Tables I, II, IV and Figures I, VIII, IX and XV in the main paper 
*
*
* Author:	
*
*       Jerome Adda and Marco Ottaviani
*
* Date created: 
*	20 Aug 2023
*	
********************************************************************* 	

clear
**** Load data on research field 
global path "/Users/`c(username)'/Dropbox/ValutazioneBocconi_ERC/ERC/ReplicationQJE"

cd "$path"
* Set to one to display the graphs
local graph=1

clear
use reliability

** Figure IX
if `graph'==1 {
preserve
use reliability,clear
graph hbar (asis) Gwet, over(panel2) over(panelcodeERC, label(labsize(small)) sort(Gwet)) ytitle(Inter-rater agreement (Gwet's AC)) exclude0  ymtick(##4) scheme(s1color) asyvars bar(1,color(navy))  bar(2,color(red))  bar(3,color(green)) bargap(-100) legend(cols(1) order(1 "Life Sciences" 2 "Physical Sciences & Engineering" 3 "Social Sciences & Humanities"))
graph export FigureIX.pdf ,replace
restore
}

*** Table IV

mat TableIV_b=KAPPA[1..25,1], KAPPA[1..25,3], KAPPA[1..25,4],KAPPA[1..25,5],KAPPA[1..25,13]
mat TableIV_se=KAPPA[1..25,7], KAPPA[1..25,9], KAPPA[1..25,10],KAPPA[1..25,11],J(25,1,.)

mat rownames TableIV_b= SH01 SH02 SH03 SH04 SH05 SH06 LS01 LS02 LS03 LS04 LS05 LS06 LS07 LS08 LS09 PE01 PE02 PE03 PE04 PE05 PE06 PE07 PE08 PE09 PE10
mat rownames TableIV_se= SH01 SH02 SH03 SH04 SH05 SH06 LS01 LS02 LS03 LS04 LS05 LS06 LS07 LS08 LS09 PE01 PE02 PE03 PE04 PE05 PE06 PE07 PE08 PE09 PE10

est2tex TableIV, digit(3) replace dropall preserve fancy mark(starb) level(90 95 99) leadzero collabels(c1 c2 c3 c4 c5)


**************************************************************************

clear
import excel "$path/Success rates .xlsx", sheet("data") cellrange(A3:F1793) firstrow
save SuccessRates_from_Excell,replace
use SuccessRates_from_Excell
keep if GrantedorEvaluated=="Evaluated"
rename Numberofprojects evaluated
drop GrantedorEvaluated
rename Callyear year
sort year Type Panel
save ERCevaluated,replace

u SuccessRates_from_Excell
keep if GrantedorEvaluated=="Granted"
rename Numberofprojects granted
drop GrantedorEvaluated
rename Callyear year
sort year Type Panel
save ERCgranted,replace
merge year Type Panel using ERCevaluated
drop _merge
gen success= granted/evaluated
rename Panel panelcodeERC
gen grant=.
replace grant=1 if Type=="Starting Grant"
replace grant=2 if Type=="Consolidator Grant"
replace grant=3 if Type=="Advanced Grant"
drop Type
replace panelcodeERC=substr(panelcodeERC,1,2)+"0"+substr(panelcodeERC,3,3) if length(panelcodeERC)==3
sort year panelcodeERC grant 
save ERCSuccess,replace


************************************************************
clear
import excel "$path/erc2020_complete.xlsx", sheet("Raw data") firstrow
rename *,lower
gen grant=substr(granttype,1,1)
destring grant,replace
* construct the budget by year, grant type and panel
collapse (sum) budget (first) paneldescription,by(year grant panel)
rename panel panelcodeERC

sort year panelcodeERC grant 
merge year panelcodeERC grant using ERCSuccess
drop _merge
egen tot_eval=sum(evaluated)
egen tot_eval_year=sum(evaluated),by(year)
egen tot_eval_year_grant=sum(evaluated),by(year grant)
egen tot_eval_year_grant_domain=sum(evaluated),by(year grant Researchdomain)

mat TABLEIIb_b=J(5,5,.)
mat TABLEIIb_se=J(5,5,.)

di "Number of applications per year& "
su tot_eval_year [fw=evaluated]
mat TABLEIIb_b[1,1]=r(N)
su year
mat TABLEIIb_b[2,2]=r(max)-r(min)+1
mat TABLEIIb_b[2,4]=r(min)
mat TABLEIIb_b[2,5]=r(max)

di "Success rate& "
preserve
collapse (sum) granted evaluated ,by(year)
gen  success=granted/evaluated
su evaluated
mat TABLEIIb_b[3,2]=round(r(mean),1)
mat TABLEIIb_b[3,3]=round(r(sd),1)
mat TABLEIIb_b[3,4]=r(min)
mat TABLEIIb_b[3,5]=r(max)
su success
mat TABLEIIb_b[4,2]=round(r(mean),0.01)
mat TABLEIIb_b[4,3]=round(r(sd),0.01)
mat TABLEIIb_b[4,4]=round(r(min),0.01)
mat TABLEIIb_b[4,5]=round(r(max),0.01)
restore
gen bud_per_grant= budget/granted
di "Budget per grant (euros) & "
su bud_per_grant [fw=granted]
mat TABLEIIb_b[5,1]=round(r(N),1)
mat TABLEIIb_b[5,2]=round(r(mean),1)
mat TABLEIIb_b[5,3]=round(r(sd),1)
mat TABLEIIb_b[5,4]=round(r(min),1)
mat TABLEIIb_b[5,5]=round(r(max),1)

** Table II
mat rownames TABLEIIb_b = Number~of~applications Number~of~years Number~of~applications~per~year Success~rate Budget~per~successful~grant
mat colnames TABLEIIb_b = c1 c2 c3 c4 c5
est2tex TABLEIIb, digit(3) replace dropall preserve suppress collabels(Obs Mean Std.~Dev. Min Max)



sort panelcodeERC
merge panelcodeERC using reliability
tab _merge
drop _merge
* construct the term sigma_i-\bar{sigma}_it
foreach var of varlist PctAgree Brennan Cohen Fleiss Gwet Krippendorff {
gen dev`var'=-devPre`var'
replace dev`var'=-devPost`var' if year>2015
}

drop if year==2007

egen total=sum(budget),by(year grant)
gen ratio=budget/total*100
sort grant panelcodeERC year
by grant panelcodeERC:gen init=ratio[1]
gen ratio2=ratio/init
tab grant,gen(Grant)
gen applic=evaluated/tot_eval_year_grant

egen grp=group(panelcodeERC grant)
qui tab grp,gen(Group)
xtset grp year, yearly
qui tab year,gen(Year)

** Figure I
if `graph'==1 {
preserve	
drop if year<2009
collapse (sum) budget, by(year panel2)
egen total=sum(budget),by(year)
gen ratio=budget/total

sort panel2 year
by panel2: gen init=ratio[1]
gen ratio2=ratio/init

twoway (line ratio year if panel2=="LS",  lwidth(thick) lpattern(longdash) lcolor(navy))  (line ratio year if panel2=="PE", lwidth(thick) lcolor(red)) (line ratio year if panel2=="SH", lwidth(thick) lpattern(dash) lcolor(green)) ,  xline(2014, lpattern(dash))  legend(cols(1) order(1 "Life Sciences" 2 "Physical Sciences & Engineering" 3 "Social Sciences & Humanities")) xtitle(Year) ytitle("Share of Budget")  scheme(s1color)
graph export FigureI.pdf, replace
restore
}

** Figure VIII
if `graph'==1 {
preserve
gen period=year
recode period 2009/2013=1 2016/2021=2 else=.
drop if period==.
collapse (mean) ratio,by(period panelcodeERC)
reshape wide ratio,i(panelcode) j(period)
gen change=ratio2-ratio1
gen panel2=substr(panelcodeERC,1,2)
gen relchange=change/ratio1*100
graph hbar (asis) relchange, over(panel2) over(panelcodeERC, label(labsize(small)) sort(relchange)) ytitle(Percent Change 2009-2013 vs 2016-2021) yline(0) ymtick(##4) scheme(s1color) asyvars bar(1,color(navy))  bar(2,color(red))  bar(3,color(green)) bargap(-100) legend(cols(1) order(1 "Life Sciences" 2 "Physical Sciences & Engineering" 3 "Social Sciences & Humanities"))

graph export FigureVIII.pdf, replace
restore
}


**************************************************************************************
**   Table I
**************************************************************************************

gen ratio_=ratio
gen success_=success
foreach var of varlist ratio applic success devPctAgree devBrennan devCohen devFleiss devGwet devKrippendorff {
su `var'
replace `var'=`var'/r(sd)	
}
mat RATIO_b=J(3,5,0)
mat RATIO_se=J(3,5,0)
mat PROB_b=J(3,5,0)
mat PROB_se=J(3,5,0)
mat APPLI_b=J(3,5,0)
mat APPLI_se=J(3,5,0)

****  BUDGET RATIOS  *****
** POOLED OVER ALL DISCIPLINES
* Percent Agreement
xtpcse ratio devPctAgree Group* ,corr(ar1)
mat b=get(_b)
mat RATIO_b[1,1]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat RATIO_se[1,1]=round(sqrt(vce[1,1]),0.00001)
mat RATIO_b[2,1]=round(e(r2),0.001)
mat RATIO_b[3,1]=e(N)

* Cohen Kappa
*xtpcse ratio PostCohen Post Group* ,corr(ar1)
xtpcse ratio devCohen Group* ,corr(ar1)
mat b=get(_b)
mat RATIO_b[1,2]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat RATIO_se[1,2]=round(sqrt(vce[1,1]),0.00001)
mat RATIO_b[2,2]=round(e(r2),0.001)
mat RATIO_b[3,2]=e(N)

* Fleiss Kappa
xtpcse ratio devFleiss Group* ,corr(ar1)
mat b=get(_b)
mat RATIO_b[1,3]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat RATIO_se[1,3]=round(sqrt(vce[1,1]),0.00001)
mat RATIO_b[2,3]=round(e(r2),0.001)
mat RATIO_b[3,3]=e(N)

* Gwet AC
xtpcse ratio devGwet Group* ,corr(ar1)
mat b=get(_b)
mat RATIO_b[1,4]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat RATIO_se[1,4]=round(sqrt(vce[1,1]),0.00001)
mat RATIO_b[2,4]=round(e(r2),0.001)
mat RATIO_b[3,4]=e(N)

* Brennan AC
xtpcse ratio devBrennan Group* ,corr(ar1)
mat b=get(_b)
mat RATIO_b[1,5]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat RATIO_se[1,5]=round(sqrt(vce[1,1]),0.00001)
mat RATIO_b[2,5]=round(e(r2),0.001)
mat RATIO_b[3,5]=e(N)

****  APPLICANTS  *****
** POOLED OVER ALL DISCIPLINES
* Percent Agreement
xtpcse applic devPctAgree Group* ,corr(ar1)
mat b=get(_b)
mat APPLI_b[1,1]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat APPLI_se[1,1]=round(sqrt(vce[1,1]),0.00001)
mat APPLI_b[2,1]=round(e(r2),0.001)
mat APPLI_b[3,1]=e(N)

* Cohen Kappa
xtpcse applic devCohen Group* ,corr(ar1)
mat b=get(_b)
mat APPLI_b[1,2]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat APPLI_se[1,2]=round(sqrt(vce[1,1]),0.00001)
mat APPLI_b[2,2]=round(e(r2),0.001)
mat APPLI_b[3,2]=e(N)

* Fleiss Kappa
xtpcse applic devFleiss Group* ,corr(ar1)
mat b=get(_b)
mat APPLI_b[1,3]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat APPLI_se[1,3]=round(sqrt(vce[1,1]),0.00001)
mat APPLI_b[2,3]=round(e(r2),0.001)
mat APPLI_b[3,3]=e(N)

* Gwet AC
xtpcse applic devGwet Group* ,corr(ar1)
mat b=get(_b)
mat APPLI_b[1,4]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat APPLI_se[1,4]=round(sqrt(vce[1,1]),0.00001)
mat APPLI_b[2,4]=round(e(r2),0.001)
mat APPLI_b[3,4]=e(N)

* Brennan AC
xtpcse applic devBrennan Group* ,corr(ar1)
mat b=get(_b)
mat APPLI_b[1,5]=round(b[1,1],0.0001)
mat vce=get(VCE)
mat APPLI_se[1,5]=round(sqrt(vce[1,1]),0.00001)
mat APPLI_b[2,5]=round(e(r2),0.001)
mat APPLI_b[3,5]=e(N)


mat TABLE1_b=APPLI_b[1,1..5]',RATIO_b[1,1..5]'
mat TABLE1_se=APPLI_se[1,1..5]',RATIO_se[1,1..5]'
mat TABLE1_b=TABLE1_b \ (APPLI_b[3,1],RATIO_b[3,1])
mat TABLE1_se=TABLE1_se \ J(1,2,.)

mat rownames TABLE1_b = Percent~agreement Cohen~kappa Fleiss~kappa Gwet~AC Brennan~AC Observations
mat colnames TABLE1_b = c1 c2
est2tex TABLE1, digit(3) replace dropall preserve fancy mark(starb) level(90 95 99) leadzero collabels(c1 c2)



***** Figure XV

gen after=year>2016
encode panelcode,gen(panel)
drop Year7


areg ratio Year* if year<=2020&Researchdomain=="PE",a(grant)
mat bPE=get(_b)'
mat bPE=bPE[1..6,1] \ 0 \ bPE[7..12,1]
mat vce=get(VCE)
mat stdPE=J(13,1,0)
for num 1/6:mat stdPE[X,1]=sqrt(vce[X,X])
for num 8/13:mat stdPE[X,1]=sqrt(vce[X-1,X-1])

areg ratio Year* if year<=2020&Researchdomain=="SH",a(grant)
mat bSH=get(_b)'
mat bSH=bSH[1..6,1] \ 0 \ bSH[7..12,1]
mat vce=get(VCE)
mat stdSH=J(13,1,0)
for num 1/6:mat stdSH[X,1]=sqrt(vce[X,X])
for num 8/13:mat stdSH[X,1]=sqrt(vce[X-1,X-1])

areg ratio Year* if year<=2020&Researchdomain=="LS",a(grant)
mat bLS=get(_b)'
mat bLS=bPE[1..6,1] \ 0 \ bLS[7..12,1]
mat vce=get(VCE)
mat stdLS=J(13,1,0)
for num 1/6:mat stdLS[X,1]=sqrt(vce[X,X])
for num 8/13:mat stdLS[X,1]=sqrt(vce[X-1,X-1])

preserve
svmat bPE 
svmat bSH
svmat bLS
svmat stdPE 
svmat stdSH
svmat stdLS
gen bPEl=bPE1-1.96*stdPE
gen bPEh=bPE1+1.96*stdPE
gen bSHl=bSH1-1.96*stdSH
gen bSHh=bSH1+1.96*stdSH
gen bLSl=bLS1-1.96*stdLS
gen bLSh=bLS1+1.96*stdLS
gen time=2007+_n if _n<14

gen timeH=time+0.1
gen timeL=time-0.1

twoway (rarea bPEl bPEh timeL, fcolor(%20) lwidth(none)) (rarea bSHl bSHh timeH, fcolor(%20) lwidth(none)) (rarea bLSl bLSh time, fcolor(%20) lwidth(none)), yline(0, lwidth(medthick) lpattern(dot)) xtitle(Year) legend(order(1 "PE" 2 "SH" 3 "LS")) xline(2015,lwidth(medthick) lpattern(dot)) scheme(s1color) title(Budget Shares)  xlabel(2008 2010 2012 2014 2016 2018 2020)

graph export FigureXVb.pdf, replace
restore



areg applic Year* if year<=2020&Researchdomain=="PE",a(grant)
mat bPE=get(_b)'
mat bPE=bPE[1..6,1] \ 0 \ bPE[7..12,1]
mat vce=get(VCE)
mat stdPE=J(13,1,0)
for num 1/6:mat stdPE[X,1]=sqrt(vce[X,X])
for num 8/13:mat stdPE[X,1]=sqrt(vce[X-1,X-1])

areg applic Year* if year<=2020&Researchdomain=="SH",a(grant)
mat bSH=get(_b)'
mat bSH=bSH[1..6,1] \ 0 \ bSH[7..12,1]
mat vce=get(VCE)
mat stdSH=J(13,1,0)
for num 1/6:mat stdSH[X,1]=sqrt(vce[X,X])
for num 8/13:mat stdSH[X,1]=sqrt(vce[X-1,X-1])

areg applic Year* if year<=2020&Researchdomain=="LS",a(grant)
mat bLS=get(_b)'
mat bLS=bPE[1..6,1] \ 0 \ bLS[7..12,1]
mat vce=get(VCE)
mat stdLS=J(13,1,0)
for num 1/6:mat stdLS[X,1]=sqrt(vce[X,X])
for num 8/13:mat stdLS[X,1]=sqrt(vce[X-1,X-1])

preserve
svmat bPE 
svmat bSH
svmat bLS
svmat stdPE 
svmat stdSH
svmat stdLS
gen bPEl=bPE1-1.96*stdPE
gen bPEh=bPE1+1.96*stdPE
gen bSHl=bSH1-1.96*stdSH
gen bSHh=bSH1+1.96*stdSH
gen bLSl=bLS1-1.96*stdLS
gen bLSh=bLS1+1.96*stdLS
gen time=2007+_n if _n<14

gen timeH=time+0.1
gen timeL=time-0.1

twoway (rarea bPEl bPEh timeL, fcolor(%20) lwidth(none)) (rarea bSHl bSHh timeH, fcolor(%20) lwidth(none)) (rarea bLSl bLSh time, fcolor(%20) lwidth(none)), yline(0, lwidth(medthick) lpattern(dot)) xtitle(Year) legend(order(1 "PE" 2 "SH" 3 "LS")) xline(2015,lwidth(medthick) lpattern(dot)) scheme(s1color) title(Grant Applications)  xlabel(2008 2010 2012 2014 2016 2018 2020)
graph export FigureXVa.pdf, replace
restore



** Figure XIV
clear
use RCN_Predicted
* Place PE10 after PE9
gen new=substr(predERCcode,1,2)+"0"+substr(predERCcode,3,1)
replace new=predERCcode if predERCcode=="PE10"
replace predERCcode=new
drop new

sort predERC
encode predERC,gen(Assigned)

qui for var pe* ls* sh*:replace X=0 if X==.
gen i=_n

for num 1/9:rename lsX probX
rename pe1 prob10
rename pe2 prob11
rename pe3 prob12
rename pe4 prob13
rename pe5 prob14
rename pe6 prob15
rename pe7 prob16
rename pe8 prob17
rename pe9 prob18
rename pe10 prob19
rename sh1 prob20
rename sh2 prob21
rename sh3 prob22
rename sh4 prob23
rename sh5 prob24
rename sh6 prob25

reshape long prob,i(i) j(erc)

gen potential=predERCcode if i==erc
sort erc potential
by erc:replace potential=potential[_N] if potential==""
sort pred erc
encode potential,gen(Potential)

gen prob1=prob>=0.9
gen prob2=prob>=0.8&prob<0.9
gen prob3=prob>=0.6&prob<0.8
gen prob4=prob>=0.1&prob<0.6
gen prob5=prob>=0.05&prob<0.1
gen prob6=prob>=0&prob<0.05

twoway (scatter Assigned Potential if prob1==1, mcolor(black)) /// 
       (scatter Assigned Potential if prob2==1, mcolor(dknavy)) /// 
	   (scatter Assigned Potential if prob3==1, mcolor(blue%80)) ///
	   (scatter Assigned Potential if prob4==1, mcolor(midblue%60)) ///
	   (scatter Assigned Potential if prob5==1, mcolor(midblue%40)) ///
	   (scatter Assigned Potential if prob6==1, mcolor(navy%10)), /// 
	   ytitle(`"Other Possible Panels"') ylabel(#25, labels labsize(tiny) angle(horizontal) valuelabel) xtitle(`"Assigned Panel"') xlabel(#25, labsize(tiny) angle(forty_five) valuelabel) legend(order(1 "Prob in [0.9,1]" 2 "Prob in [0.8,0.9]" 3 "Prob in [0.6,0.8]"  4 "Prob in [0.1,0.6]"  5 "Prob in [0.05,0.1]"  6 "Prob in [0,0.05]" )) scheme(s1color) aspectratio(1)
graph export FigureXV.pdf,replace


** clean up temporary files
global filename "ERCevaluated.dta ERCgranted.dta ERCSuccess.dta SuccessRates_from_Excell.dta TABLE1_tbl.dta TABLEIIb_tbl.dta TableIV_tbl.dta"
foreach file of global filename {
erase `file'	
}

